In [1]:
%pylab
%matplotlib inline
In [2]:
cd ..
In [7]:
import sys
import numpy as np
import skimage
import cv2
import sklearn
In [4]:
from IPython.display import display
from IPython.display import Image
from IPython.display import HTML
In [5]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
In [6]:
import neukrill_net.utils as utils
import neukrill_net.image_processing as image_processing
import neukrill_net.bagofwords as bagofwords
In [8]:
# Load the settings
settings = utils.Settings('settings.json')
In [9]:
rawdata, labels = utils.load_rawdata(settings.image_fnames, classes=settings.classes)
In [10]:
label_encoder = sklearn.preprocessing.LabelEncoder()
y = label_encoder.fit_transform(labels)
In [11]:
bow_options = {'verbose':True, 'n_features_max':100, 'patch_size':15, 'clusteralgo':'kmeans', 'n_clusters':20, 'random_seed':42}
In [23]:
bow = bagofwords.Bow(normalise_hist=False, **bow_options)
In [13]:
sample = np.random.random_integers(0, len(rawdata)-1, size=(1000))
In [24]:
bow.build_vocabulary([rawdata[i] for i in sample])
Setup
In [37]:
# For each image, get all keypoint descriptions
dscdata = [bow.describeImage(rawdata[i]) for i in sample]
# Remove empty descriptions from images without any keypoints
dscdata = [x for x in dscdata if x is not None]
# Flatten so we have all keypoints from all images on top of each other
dscdata = np.vstack(dscdata)
dscclass = bow.cluster.predict(dscdata)
PCA
In [55]:
reduced_data = sklearn.decomposition.PCA(n_components=3).fit_transform(dscdata)
In [56]:
plt.scatter(reduced_data[:,0], reduced_data[:,1], c=dscclass, alpha=0.5, linewidths=0, s=1)
plt.show()
plt.scatter(reduced_data[:,0], reduced_data[:,2], c=dscclass, alpha=0.5, linewidths=0, s=1)
plt.show()
plt.scatter(reduced_data[:,1], reduced_data[:,2], c=dscclass, alpha=0.5, linewidths=0, s=1)
plt.show()
ICA
In [53]:
reduced_data = sklearn.decomposition.FastICA(n_components=3).fit_transform(dscdata)
In [54]:
plt.scatter(reduced_data[:,0], reduced_data[:,1], c=dscclass, alpha=0.5, linewidths=0, s=1)
plt.show()
plt.scatter(reduced_data[:,0], reduced_data[:,2], c=dscclass, alpha=0.5, linewidths=0, s=1)
plt.show()
plt.scatter(reduced_data[:,1], reduced_data[:,2], c=dscclass, alpha=0.5, linewidths=0, s=1)
plt.show()
Factor Analysis
In [57]:
reduced_data = sklearn.decomposition.FactorAnalysis(n_components=3).fit_transform(dscdata)
In [58]:
plt.scatter(reduced_data[:,0], reduced_data[:,1], c=dscclass, alpha=0.5, linewidths=0, s=1)
plt.show()
plt.scatter(reduced_data[:,0], reduced_data[:,2], c=dscclass, alpha=0.5, linewidths=0, s=1)
plt.show()
plt.scatter(reduced_data[:,1], reduced_data[:,2], c=dscclass, alpha=0.5, linewidths=0, s=1)
plt.show()
In [27]:
X = [bow.compute_image_bow(img) for img in rawdata]
X = np.vstack(X)
In [16]:
cv = sklearn.cross_validation.StratifiedShuffleSplit(y)
In [17]:
clf = sklearn.linear_model.LogisticRegression()
In [28]:
X[:10]
Out[28]:
In [26]:
bow.compute_image_bow(rawdata[0])
Out[26]:
In [29]:
print('Cross-validating')
results = []
for train, test in cv:
# Make a new BOW encoding
#bow = bagofwords.Bow(**bow_options)
#bow.build_vocabulary([rawdata[i] for i in train])
#X = [bow.compute_image_bow(img) for img in rawdata]
clf.fit(X[train], y[train])
p = clf.predict_proba(X[test])
res = sklearn.metrics.log_loss(y[test], p)
print(res)
results.append(res)
Try to predict classes of test data
In [30]:
print('Fitting clf to all training data')
clf.fit(X,y)
In [ ]:
print('Loading the raw test data')
rawtest, names = utils.load_rawdata(settings.image_fnames)
In [34]:
print('Bagging words for raw test data')
X2 = [bow.compute_image_bow(img) for img in rawtest]
X2 = np.vstack(X2)
p = clf.predict_proba(X2)
In [32]:
len(settings.image_fnames['test'])
Out[32]:
In [33]:
X2.shape
Out[33]: